package com.share.example.spider.processor;

import com.share.common.utils.SpiderUtil;
import com.share.example.spider.model.db.Fund;
import com.share.example.spider.model.dto.FundDto;
import lombok.extern.slf4j.Slf4j;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;

/**
 * @author 01401061 - 孙艳强
 * @date 2020/10/28 15:26
 */
@Slf4j
public class FundProcessor implements PageProcessor {


    /**
     * 1.解析数据
     * 2.获取下一页的链接
     * 3.通过page把数据推出去
     */
    @Override
    public void process(Page page) {
        FundDto data = new FundDto();

        Html html = page.getHtml();
        String fundCode = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[1]/div[1]/div/span[2]/span[1]/text()").get();
        String name = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[1]/div[1]/div/text()").get();
        String netWorth = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[2]/dd[1]/span[1]/text()").get();
        String accumWorth = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[3]/dd[1]/span/text()").get();
        String earningRate = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[2]/dd[1]/span[2]/text()").get();
        String riskType = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[2]/table/tbody/tr[1]/td[1]/a/text()").get();
        String month1 = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[1]/dd[2]/span[2]/text()").get();
        String month3 = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[2]/dd[2]/span[2]/text()").get();
        String month6 = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[3]/dd[2]/span[2]/text()").get();
        String year1 = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[1]/dd[3]/span[2]/text()").get();
        String year3 = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[2]/dd[3]/span[2]/text()").get();
        String estabRate = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[1]/dl[3]/dd[3]/span[2]/text()").get();
        String manager = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[2]/table/tbody/tr[1]/td[3]/a/text()").get();

        String strEstabDate = html.xpath("//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[2]/table/tbody/tr[2]/td[1]/text()").get();
        strEstabDate = strEstabDate.replace(":", "").replace("：", "").trim();

        String scalePrice = html.xpath("/html/body/div[11]/div[3]/table/tbody/tr[1]/td[2]//*[@id=\"body\"]/div[12]/div/div/div[3]/div[1]/div[2]/table/tbody/tr[1]/td[2]/text()").get();
        scalePrice = scalePrice.replace(":", "").replace("：", "").trim();

        data.setFundCode(SpiderUtil.filter(fundCode));
        data.setName(SpiderUtil.filter(name));
        data.setScalePrice(SpiderUtil.filter(scalePrice));//规模

        data.setNetWorth(SpiderUtil.filter(netWorth));
        data.setAccumWorth(SpiderUtil.filter(accumWorth));
        data.setRiskType(SpiderUtil.filter(riskType));
        data.setEarningRate(SpiderUtil.filter(earningRate));

        data.setMonth1Rate(SpiderUtil.filter(month1));
        data.setMonth3Rate(SpiderUtil.filter(month3));
        data.setMonth6Rate(SpiderUtil.filter(month6));

        data.setYear1Rate(SpiderUtil.filter(year1));
        data.setYear3Rate(SpiderUtil.filter(year3));
        data.setEstabRate(SpiderUtil.filter(estabRate));//累计

        data.setManager(SpiderUtil.filter(manager));

        page.putField(Fund.class.getName(), data);
    }

    /*site的作用是限定爬取的网络位置*/
    @Override
    public Site getSite() {
        Site site = new Site();
        site.setDomain("fund.eastmoney.com");
        site.setUserAgent("User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36");
        return site;
    }
}
